{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Value Iteration algorithm uses the calculated utilities of all the states and compares them after an equilibrium is reached to calculate which is the best move to be taken. The algorithm reaches an equlibrium and this can be known using a stopping criteria. The stopping criteria taken is when no state's utility gets changed by much between two consecutive iterations."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Implementing the Value Iteration algorithm:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def state_utility(v, T, u, reward, gamma):\n",
    "    \n",
    "    #v is the state vector\n",
    "    #T is the transition matrix\n",
    "    #u is the utility vector\n",
    "    #reward consists of the rewards earned for moving to a particular state\n",
    "    #gamma is the discount factor by which rewards are discounted over the time\n",
    "\n",
    "    action_array = np.zeros(4)\n",
    "    for action in range(0, 4):\n",
    "        action_array[action] = np.sum(np.multiply(u, np.dot(v, T[:,:,action])))\n",
    "    return reward + gamma * np.max(action_array)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "=================== FINAL RESULT ==================\n",
      "Iterations: 26\n",
      "Delta: 9.511968687869743e-06\n",
      "Gamma: 0.999\n",
      "Epsilon: 0.01\n",
      "===================================================\n",
      "[0.80796341 0.86539911 0.91653199 1.        ]\n",
      "[ 0.75696613  0.          0.65836281 -1.        ]\n",
      "[0.69968168 0.64881721 0.60471137 0.3814863 ]\n",
      "===================================================\n"
     ]
    }
   ],
   "source": [
    "def main():\n",
    "    \n",
    "    tot_states = 12\n",
    "    gamma = 0.999 \n",
    "    iteration = 0 #Iteration counter\n",
    "    epsilon = 0.01 #Stopping criteria given a small value\n",
    "\n",
    "    #List containing the data for each iteation\n",
    "    graph_list = list()\n",
    "\n",
    "    #Transition matrix loaded from file\n",
    "    T = np.load(\"T.npy\")\n",
    "\n",
    "    #Reward vector\n",
    "    r = np.array([-0.04, -0.04, -0.04,  +1.0,\n",
    "                  -0.04,   0.0, -0.04,  -1.0,\n",
    "                  -0.04, -0.04, -0.04, -0.04])    \n",
    "\n",
    "    #Utility vectors\n",
    "    u = np.array([0.0, 0.0, 0.0,  0.0,\n",
    "                   0.0, 0.0, 0.0,  0.0,\n",
    "                   0.0, 0.0, 0.0,  0.0])\n",
    "    \n",
    "    u1 = np.array([0.0, 0.0, 0.0,  0.0,\n",
    "                    0.0, 0.0, 0.0,  0.0,\n",
    "                    0.0, 0.0, 0.0,  0.0])\n",
    "\n",
    "    while True:\n",
    "        delta = 0\n",
    "        u = u1.copy()\n",
    "        iteration += 1\n",
    "        graph_list.append(u)\n",
    "        for s in range(tot_states):\n",
    "            reward = r[s]\n",
    "            v = np.zeros((1,tot_states))\n",
    "            v[0,s] = 1.0\n",
    "            u1[s] = state_utility(v, T, u, reward, gamma)\n",
    "            delta = max(delta, np.abs(u1[s] - u[s])) #Stopping criteria checked    \n",
    "            \n",
    "        if delta < epsilon * (1 - gamma) / gamma:\n",
    "                print(\"=================== FINAL RESULT ==================\")\n",
    "                print(\"Iterations: \" + str(iteration))\n",
    "                print(\"Delta: \" + str(delta))\n",
    "                print(\"Gamma: \" + str(gamma))\n",
    "                print(\"Epsilon: \" + str(epsilon))\n",
    "                print(\"===================================================\")\n",
    "                print(u[0:4])\n",
    "                print(u[4:8])\n",
    "                print(u[8:12])\n",
    "                print(\"===================================================\")\n",
    "                break\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    main()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
